{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Fine-tune CNN with pretrained VGG"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Package loaded\n",
      "Current folder is /home/enginius/github/tensorflow-101/notebooks\n"
     ]
    }
   ],
   "source": [
    "# Import packs\n",
    "import numpy as np\n",
    "import os\n",
    "import scipy.io\n",
    "from scipy.misc import imread, imresize\n",
    "import matplotlib.pyplot as plt\n",
    "import skimage.io\n",
    "import skimage.transform\n",
    "import tensorflow as tf\n",
    "%matplotlib inline  \n",
    "cwd = os.getcwd()\n",
    "print (\"Package loaded\")\n",
    "print (\"Current folder is %s\" % (cwd) )"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Load images and resize them to make a dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Number of total images is 87 (train: 69, test: 18)\n",
      "Shape of an image is (64, 64, 3)\n"
     ]
    }
   ],
   "source": [
    "# Configure the locations of the images and reshaping sizes\n",
    "# ------------------------------------------------------------------- #\n",
    "paths = {\"images/cats\", \"images/dogs\"}\n",
    "imgsize = [64, 64]      # The reshape size\n",
    "use_gray = 0            # Grayscale\n",
    "data_name = \"data4vgg\"  # Save name\n",
    "valid_exts = [\".jpg\",\".gif\",\".png\",\".tga\", \".jpeg\"]\n",
    "# ------------------------------------------------------------------- #\n",
    "\n",
    "imgcnt = 0\n",
    "nclass = len(paths)\n",
    "for relpath in paths:\n",
    "    fullpath = cwd + \"/\" + relpath\n",
    "    flist = os.listdir(fullpath)\n",
    "    for f in flist:\n",
    "        if os.path.splitext(f)[1].lower() not in valid_exts:\n",
    "            continue\n",
    "        fullpath = os.path.join(fullpath, f)\n",
    "        imgcnt = imgcnt + 1\n",
    "# Grayscale\n",
    "def rgb2gray(rgb):\n",
    "    if len(rgb.shape) is 3:\n",
    "        return np.dot(rgb[...,:3], [0.299, 0.587, 0.114])\n",
    "    else:\n",
    "        print (\"Current Image is GRAY!\")\n",
    "        return rgb\n",
    "if use_gray:\n",
    "    totalimg   = np.ndarray((imgcnt, imgsize[0]*imgsize[1]))\n",
    "else:\n",
    "    totalimg   = np.ndarray((imgcnt, imgsize[0]*imgsize[1]*3))\n",
    "totallabel = np.ndarray((imgcnt, nclass))\n",
    "imgcnt     = 0\n",
    "for i, relpath in zip(range(nclass), paths):\n",
    "    path = cwd + \"/\" + relpath\n",
    "    flist = os.listdir(path)\n",
    "    for f in flist:\n",
    "        if os.path.splitext(f)[1].lower() not in valid_exts:\n",
    "            continue\n",
    "        fullpath = os.path.join(path, f)\n",
    "        currimg  = imread(fullpath)\n",
    "        # Convert to grayscale  \n",
    "        if use_gray:\n",
    "            grayimg  = rgb2gray(currimg)\n",
    "        else:\n",
    "            grayimg  = currimg\n",
    "        # Reshape\n",
    "        graysmall = imresize(grayimg, [imgsize[0], imgsize[1]])/255.\n",
    "        grayvec   = np.reshape(graysmall, (1, -1))\n",
    "        # Save \n",
    "        totalimg[imgcnt, :] = grayvec\n",
    "        totallabel[imgcnt, :] = np.eye(nclass, nclass)[i]\n",
    "        imgcnt    = imgcnt + 1\n",
    "        \n",
    "# Divide total data into training and test set\n",
    "randidx    = np.random.randint(imgcnt, size=imgcnt)\n",
    "trainidx   = randidx[0:int(4*imgcnt/5)]\n",
    "testidx    = randidx[int(4*imgcnt/5):imgcnt]\n",
    "trainimg   = totalimg[trainidx, :]\n",
    "trainlabel = totallabel[trainidx, :]\n",
    "testimg    = totalimg[testidx, :]\n",
    "testlabel  = totallabel[testidx, :]\n",
    "ntrain     = trainimg.shape[0]\n",
    "nclass     = trainlabel.shape[1]\n",
    "dim        = trainimg.shape[1]\n",
    "ntest      = testimg.shape[0]\n",
    "\n",
    "print (\"Number of total images is %d (train: %d, test: %d)\" \n",
    "       % (imgcnt, ntrain, ntest)) \n",
    "print (\"Shape of an image is (%d, %d, %d)\" % (imgsize[0], imgsize[1], 3))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Define VGG network structure"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "VGG net ready\n"
     ]
    }
   ],
   "source": [
    "def net(data_path, input_image):\n",
    "    layers = (\n",
    "        'conv1_1', 'relu1_1', 'conv1_2', 'relu1_2', 'pool1',\n",
    "        'conv2_1', 'relu2_1', 'conv2_2', 'relu2_2', 'pool2',\n",
    "        'conv3_1', 'relu3_1', 'conv3_2', 'relu3_2', 'conv3_3',\n",
    "        'relu3_3', 'conv3_4', 'relu3_4', 'pool3',\n",
    "        'conv4_1', 'relu4_1', 'conv4_2', 'relu4_2', 'conv4_3',\n",
    "        'relu4_3', 'conv4_4', 'relu4_4', 'pool4',\n",
    "        'conv5_1', 'relu5_1', 'conv5_2', 'relu5_2', 'conv5_3',\n",
    "        'relu5_3', 'conv5_4', 'relu5_4'\n",
    "    )\n",
    "    data = scipy.io.loadmat(data_path)\n",
    "    mean = data['normalization'][0][0][0]\n",
    "    mean_pixel = np.mean(mean, axis=(0, 1))\n",
    "    weights = data['layers'][0]\n",
    "    net = {}\n",
    "    current = input_image\n",
    "    for i, name in enumerate(layers):\n",
    "        kind = name[:4]\n",
    "        if kind == 'conv':\n",
    "            kernels, bias = weights[i][0][0][0][0]\n",
    "            # matconvnet: weights are [width, height, in_channels, out_channels]\n",
    "            # tensorflow: weights are [height, width, in_channels, out_channels]\n",
    "            kernels = np.transpose(kernels, (1, 0, 2, 3))\n",
    "            bias = bias.reshape(-1)\n",
    "            current = _conv_layer(current, kernels, bias)\n",
    "        elif kind == 'relu':\n",
    "            current = tf.nn.relu(current)\n",
    "        elif kind == 'pool':\n",
    "            current = _pool_layer(current)\n",
    "        net[name] = current\n",
    "    assert len(net) == len(layers)\n",
    "    return net, mean_pixel\n",
    "def _conv_layer(input, weights, bias):\n",
    "    conv = tf.nn.conv2d(input, tf.constant(weights), strides=(1, 1, 1, 1),\n",
    "            padding='SAME')\n",
    "    return tf.nn.bias_add(conv, bias)\n",
    "def _pool_layer(input):\n",
    "    return tf.nn.max_pool(input, ksize=(1, 2, 2, 1), strides=(1, 2, 2, 1),\n",
    "            padding='SAME')\n",
    "def preprocess(image, mean_pixel):\n",
    "    return image - mean_pixel\n",
    "def unprocess(image, mean_pixel):\n",
    "    return image + mean_pixel\n",
    "print (\"VGG net ready\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Compute convoultional feature maps using VGG"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Shape of trainimg_tensor is (69, 64, 64, 3)\n",
      "Shape of trainimg_tensor is (18, 64, 64, 3)\n",
      "Convolutional map extraction done\n"
     ]
    }
   ],
   "source": [
    "# Preprocess\n",
    "trainimg_tensor = np.ndarray((ntrain, imgsize[0], imgsize[1], 3))\n",
    "testimg_tensor = np.ndarray((ntest, imgsize[0], imgsize[1], 3))\n",
    "for i in range(ntrain):\n",
    "    currimg = trainimg[i, :]\n",
    "    currimg = np.reshape(currimg, [imgsize[0], imgsize[1], 3])\n",
    "    trainimg_tensor[i, :, :, :] = currimg \n",
    "print (\"Shape of trainimg_tensor is %s\" % (trainimg_tensor.shape,))    \n",
    "\n",
    "for i in range(ntest):\n",
    "    currimg = testimg[i, :]\n",
    "    currimg = np.reshape(currimg, [imgsize[0], imgsize[1], 3])\n",
    "    testimg_tensor[i, :, :, :] = currimg \n",
    "print (\"Shape of trainimg_tensor is %s\" % (testimg_tensor.shape,))\n",
    "    \n",
    "# Get conv features\n",
    "VGG_PATH = cwd + \"/data/imagenet-vgg-verydeep-19.mat\"\n",
    "with tf.Graph().as_default(), tf.Session() as sess:\n",
    "    with tf.device(\"/cpu:0\"):\n",
    "        img_placeholder = tf.placeholder(tf.float32\n",
    "                                         , shape=(None, imgsize[0], imgsize[1], 3))\n",
    "        nets, mean_pixel = net(VGG_PATH, img_placeholder)\n",
    "        train_features = nets['relu5_4'].eval(feed_dict={img_placeholder: trainimg_tensor})\n",
    "        test_features  = nets['relu5_4'].eval(feed_dict={img_placeholder: testimg_tensor})\n",
    "print(\"Convolutional map extraction done\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Our conv feature map looks like"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Shape of 'train_features' is (69, 4, 4, 512)\n",
      "Shape of 'test_features' is (18, 4, 4, 512)\n"
     ]
    }
   ],
   "source": [
    "print (\"Shape of 'train_features' is %s\" % (train_features.shape,))\n",
    "print (\"Shape of 'test_features' is %s\" % (test_features.shape,))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Vectorize "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Shape of 'train_vectorized' is (69, 4, 4, 512)\n",
      "Shape of 'test_vectorized' is (18, 4, 4, 512)\n"
     ]
    }
   ],
   "source": [
    "# Vectorize\n",
    "train_vectorized = np.ndarray((ntrain, 4*4*512))\n",
    "test_vectorized  = np.ndarray((ntest, 4*4*512))\n",
    "for i in range(ntrain):\n",
    "    curr_feat = train_features[i, :, :, :]\n",
    "    curr_feat_vec = np.reshape(curr_feat, (1, -1))\n",
    "    train_vectorized[i, :] = curr_feat_vec\n",
    "for i in range(ntest):\n",
    "    curr_feat = test_features[i, :, :, :]\n",
    "    curr_feat_vec = np.reshape(curr_feat, (1, -1))\n",
    "    test_vectorized[i, :] = curr_feat_vec\n",
    "    \n",
    "print (\"Shape of 'train_vectorized' is %s\" % (train_features.shape,))\n",
    "print (\"Shape of 'test_vectorized' is %s\" % (test_features.shape,))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Define MLP for finetuning"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Network Ready to Go!\n"
     ]
    }
   ],
   "source": [
    "# Parameters\n",
    "learning_rate   = 0.0001\n",
    "training_epochs = 100\n",
    "batch_size      = 100\n",
    "display_step    = 10\n",
    "# tf Graph input\n",
    "x = tf.placeholder(tf.float32, [None, 4*4*512])\n",
    "y = tf.placeholder(tf.float32, [None, nclass])\n",
    "keepratio = tf.placeholder(tf.float32)\n",
    "# Network\n",
    "with tf.device(\"/cpu:0\"):\n",
    "    n_input  = dim\n",
    "    n_output = nclass\n",
    "    weights  = {\n",
    "        'wd1': tf.Variable(tf.random_normal([4*4*512, 1024], stddev=0.1)),\n",
    "        'wd2': tf.Variable(tf.random_normal([1024, n_output], stddev=0.1))\n",
    "    }\n",
    "    biases   = {\n",
    "        'bd1': tf.Variable(tf.random_normal([1024], stddev=0.1)),\n",
    "        'bd2': tf.Variable(tf.random_normal([n_output], stddev=0.1))\n",
    "    }\n",
    "    def conv_basic(_input, _w, _b, _keepratio):\n",
    "        # Input\n",
    "        _input_r = _input\n",
    "        # Vectorize\n",
    "        _dense1 = tf.reshape(_input_r, [-1, _w['wd1'].get_shape().as_list()[0]])\n",
    "        # Fc1\n",
    "        _fc1 = tf.nn.relu(tf.add(tf.matmul(_dense1, _w['wd1']), _b['bd1']))\n",
    "        _fc_dr1 = tf.nn.dropout(_fc1, _keepratio)\n",
    "        # Fc2\n",
    "        _out = tf.add(tf.matmul(_fc_dr1, _w['wd2']), _b['bd2'])\n",
    "        # Return everything\n",
    "        out = {'input_r': _input_r, 'dense1': _dense1,\n",
    "            'fc1': _fc1, 'fc_dr1': _fc_dr1, 'out': _out }\n",
    "        return out\n",
    "    # Functions! \n",
    "    _pred = conv_basic(x, weights, biases, keepratio)['out']\n",
    "    cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(_pred, y))\n",
    "    optm = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)\n",
    "    _corr = tf.equal(tf.argmax(_pred,1), tf.argmax(y,1)) \n",
    "    accr = tf.reduce_mean(tf.cast(_corr, tf.float32)) \n",
    "    init = tf.initialize_all_variables()\n",
    "\n",
    "print (\"Network Ready to Go!\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Optimize"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch: 000/100 cost: 7.404936314\n",
      " Training accuracy: 0.470\n",
      " Test accuracy: 0.333\n",
      "Epoch: 010/100 cost: 0.879274964\n",
      " Training accuracy: 0.770\n",
      " Test accuracy: 0.833\n",
      "Epoch: 020/100 cost: 0.226866707\n",
      " Training accuracy: 0.950\n",
      " Test accuracy: 0.889\n",
      "Epoch: 030/100 cost: 0.141039506\n",
      " Training accuracy: 0.940\n",
      " Test accuracy: 1.000\n",
      "Epoch: 040/100 cost: 0.006388153\n",
      " Training accuracy: 1.000\n",
      " Test accuracy: 0.944\n",
      "Epoch: 050/100 cost: 0.000061489\n",
      " Training accuracy: 1.000\n",
      " Test accuracy: 0.944\n",
      "Epoch: 060/100 cost: 0.000031058\n",
      " Training accuracy: 1.000\n",
      " Test accuracy: 0.944\n",
      "Epoch: 070/100 cost: 0.000004128\n",
      " Training accuracy: 1.000\n",
      " Test accuracy: 0.944\n",
      "Epoch: 080/100 cost: 0.000000439\n",
      " Training accuracy: 1.000\n",
      " Test accuracy: 0.944\n",
      "Epoch: 090/100 cost: 0.000000286\n",
      " Training accuracy: 1.000\n",
      " Test accuracy: 0.944\n",
      "Optimization Finished!\n"
     ]
    }
   ],
   "source": [
    "# Launch the graph\n",
    "sess = tf.Session()\n",
    "sess.run(init)\n",
    "\n",
    "# Training cycle\n",
    "for epoch in range(training_epochs):\n",
    "    avg_cost = 0.\n",
    "    num_batch = int(ntrain/batch_size)+1\n",
    "    # Loop over all batches\n",
    "    for i in range(num_batch): \n",
    "        randidx  = np.random.randint(ntrain, size=batch_size)\n",
    "        batch_xs = train_vectorized[randidx, :]\n",
    "        batch_ys = trainlabel[randidx, :]                \n",
    "        # Fit training using batch data\n",
    "        sess.run(optm, feed_dict={x: batch_xs, y: batch_ys, keepratio:0.7})\n",
    "        # Compute average loss\n",
    "        avg_cost += sess.run(cost, feed_dict={x: batch_xs, y: batch_ys, keepratio:1.})/num_batch\n",
    "\n",
    "    # Display logs per epoch step\n",
    "    if epoch % display_step == 0:\n",
    "        print (\"Epoch: %03d/%03d cost: %.9f\" % (epoch, training_epochs, avg_cost))\n",
    "        train_acc = sess.run(accr, feed_dict={x: batch_xs, y: batch_ys, keepratio:1.})\n",
    "        print (\" Training accuracy: %.3f\" % (train_acc))\n",
    "        test_acc = sess.run(accr, feed_dict={x: test_vectorized, y: testlabel, keepratio:1.})\n",
    "        print (\" Test accuracy: %.3f\" % (test_acc))\n",
    "\n",
    "print (\"Optimization Finished!\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
